{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:17.953374Z",
     "start_time": "2021-09-28T08:10:17.666533Z"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.simplefilter('ignore')\n",
    "\n",
    "import os\n",
    "import re\n",
    "import random\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "pd.set_option('max_colwidth', 400)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.170092Z",
     "start_time": "2021-09-28T08:10:17.955552Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content</th>\n",
       "      <th>character</th>\n",
       "      <th>emotions</th>\n",
       "      <th>movie</th>\n",
       "      <th>scene</th>\n",
       "      <th>movie_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1171_0001_A_1</td>\n",
       "      <td>天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。</td>\n",
       "      <td>o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1171_0001_A_2</td>\n",
       "      <td>天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。</td>\n",
       "      <td>c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1171_0001_A_3</td>\n",
       "      <td>o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。</td>\n",
       "      <td>o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1171_0001_A_4</td>\n",
       "      <td>o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。</td>\n",
       "      <td>c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1171_0001_A_5</td>\n",
       "      <td>o2停下来接过c1手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。</td>\n",
       "      <td>o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              id                                              content  \\\n",
       "0  1171_0001_A_1            天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。   \n",
       "1  1171_0001_A_2            天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。   \n",
       "2  1171_0001_A_3                         o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。   \n",
       "3  1171_0001_A_4                         o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。   \n",
       "4  1171_0001_A_5  o2停下来接过c1手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。   \n",
       "\n",
       "  character     emotions movie scene movie_id  \n",
       "0        o2  0,0,0,0,0,0  1171  0001        1  \n",
       "1        c1  0,0,0,0,0,0  1171  0001        2  \n",
       "2        o2  0,0,0,0,0,0  1171  0001        3  \n",
       "3        c1  0,0,0,0,0,0  1171  0001        4  \n",
       "4        o2  0,0,0,0,0,0  1171  0001        5  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv('raw_data/train_dataset_v2.tsv', sep='\\t')\n",
    "tmp = pd.DataFrame(train['id'].str.split('_', expand=True))\n",
    "tmp.columns = ['movie', 'scene', 'A', 'movie_id']\n",
    "for col in ['movie', 'scene', 'movie_id']:\n",
    "    train[col] = tmp[col].values\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.272660Z",
     "start_time": "2021-09-28T08:10:18.172105Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content</th>\n",
       "      <th>character</th>\n",
       "      <th>movie</th>\n",
       "      <th>scene</th>\n",
       "      <th>movie_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34170_0002_A_12</td>\n",
       "      <td>穿着背心的b1醒来，看看手机，三点了。</td>\n",
       "      <td>b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0002</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>34170_0002_A_14</td>\n",
       "      <td>b1走出卧室。</td>\n",
       "      <td>b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0002</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>34170_0003_A_16</td>\n",
       "      <td>b1拿着手机，点开计时功能。</td>\n",
       "      <td>b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>34170_0003_A_17</td>\n",
       "      <td>b1站在淋浴头下面，水从b1的头和脸上冲刷而过。</td>\n",
       "      <td>b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>34170_0003_A_18</td>\n",
       "      <td>b1摈着呼吸。</td>\n",
       "      <td>b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id                   content character  movie scene movie_id\n",
       "0  34170_0002_A_12       穿着背心的b1醒来，看看手机，三点了。        b1  34170  0002       12\n",
       "1  34170_0002_A_14                   b1走出卧室。        b1  34170  0002       14\n",
       "2  34170_0003_A_16            b1拿着手机，点开计时功能。        b1  34170  0003       16\n",
       "3  34170_0003_A_17  b1站在淋浴头下面，水从b1的头和脸上冲刷而过。        b1  34170  0003       17\n",
       "4  34170_0003_A_18                   b1摈着呼吸。        b1  34170  0003       18"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = pd.read_csv('raw_data/test_dataset.tsv', sep='\\t')\n",
    "tmp = pd.DataFrame(test['id'].str.split('_', expand=True))\n",
    "tmp.columns = ['movie', 'scene', 'A', 'movie_id']\n",
    "for col in ['movie', 'scene', 'movie_id']:\n",
    "    test[col] = tmp[col].values\n",
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.328362Z",
     "start_time": "2021-09-28T08:10:18.274916Z"
    }
   },
   "outputs": [],
   "source": [
    "train['character'] = train['movie'].astype(str) + '_' + train['character'].astype(str)\n",
    "test['character'] = test['movie'].astype(str) + '_' + test['character'].astype(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.339227Z",
     "start_time": "2021-09-28T08:10:18.330402Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(607, 372)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['character'].nunique(), test['character'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.343320Z",
     "start_time": "2021-09-28T08:10:18.340883Z"
    }
   },
   "outputs": [],
   "source": [
    "FIRST_NAMES = '羿祥惠盛捷霞阳豪誉涵颖梅湘丹勇苗悦朝君杰毓乐曦瑶全恒裕帅馨秋山诗东雯紫木水骏昊艳宗国源莲子锦尔蕾兵天钰财桥轩桐海运坤信卿诚欣茂明晓月韬泳绮侦熙龙舟雨晴元峻程金宇启浩莉彤槐巧艺伟伊扬洋琪正森文鹏辉泽婷美超玉娴智敬奎强玄心高嵘思朗萱昆宸甜凌俊治云仕亭苹喜寅书华瑜晨益仁璇满贵利沁淳林伯晞嘉辰'\n",
    "SECOND_NAMES = '李王张刘陈杨赵黄周吴徐孙胡朱高林何郭马罗梁宋郑谢韩唐冯于'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.348692Z",
     "start_time": "2021-09-28T08:10:18.344734Z"
    }
   },
   "outputs": [],
   "source": [
    "def gen_names():\n",
    "    f1 = FIRST_NAMES[random.randint(0, len(FIRST_NAMES)-1)]\n",
    "    f2 = FIRST_NAMES[random.randint(0, len(FIRST_NAMES)-1)]\n",
    "    s1 = SECOND_NAMES[random.randint(0, len(SECOND_NAMES)-1)]\n",
    "    return f'{s1}{f1}{f2}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.361909Z",
     "start_time": "2021-09-28T08:10:18.351177Z"
    }
   },
   "outputs": [],
   "source": [
    "train_characters = train['character'].unique()\n",
    "test_characters = test['character'].unique()\n",
    "\n",
    "train_mapping = dict()\n",
    "for c in train_characters:\n",
    "    train_mapping[c] = gen_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:18.389216Z",
     "start_time": "2021-09-28T08:10:18.363576Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content</th>\n",
       "      <th>character</th>\n",
       "      <th>emotions</th>\n",
       "      <th>movie</th>\n",
       "      <th>scene</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>character_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1171_0001_A_1</td>\n",
       "      <td>天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>1</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1171_0001_A_2</td>\n",
       "      <td>天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>2</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1171_0001_A_3</td>\n",
       "      <td>o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>3</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1171_0001_A_4</td>\n",
       "      <td>o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>4</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1171_0001_A_5</td>\n",
       "      <td>o2停下来接过c1手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>5</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1171_0001_A_6</td>\n",
       "      <td>o2停下来接过c1手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>6</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1171_0001_A_7</td>\n",
       "      <td>c1开心地点了点头。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,1,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>7</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1171_0001_A_8</td>\n",
       "      <td>o2凑近c1小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>8</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1171_0001_A_9</td>\n",
       "      <td>o2凑近c1小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>9</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1171_0001_A_10</td>\n",
       "      <td>c1再次微笑着点头，然后举手敬礼，但是手的形状却是弯的。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,1,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>10</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1171_0002_A_18</td>\n",
       "      <td>雨已经停了。</td>\n",
       "      <td>1171_nan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1171</td>\n",
       "      <td>0002</td>\n",
       "      <td>18</td>\n",
       "      <td>于海东</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1171_0001_A_11</td>\n",
       "      <td>o2笑了笑：军礼不是这么敬的。五指并拢，大臂带动小臂，举到齐眉。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,1,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>11</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1171_0001_A_12</td>\n",
       "      <td>o2示范了一个动作，c1照做。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>12</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1171_0001_A_13</td>\n",
       "      <td>o2示范了一个动作，c1照做。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>13</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1171_0001_A_14</td>\n",
       "      <td>o2：礼毕。（再次举手敬礼）敬礼。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>14</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1171_0001_A_15</td>\n",
       "      <td>c1照做。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>15</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1171_0001_A_16</td>\n",
       "      <td>b1画外音：我叫b1，上个世纪七十年代，我在祖国大西南的一个省军区的文工团里服役，我是一名舞蹈演员，团里的人都叫我小p2。我要给你们讲的是我们文工团的故事，但在这个故事里，我不是主角。主角应该是他们俩，他叫o2，那时我们歌颂默默无闻的英雄，歌颂平凡中的伟大，就是歌颂o2这种人，穿雨衣的那个女孩儿，她叫c1，是我们舞蹈队托o2接来的新兵，她和o2几十年后的结局还要追溯到o2带她走进文工团的这一天。</td>\n",
       "      <td>1171_b1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>16</td>\n",
       "      <td>黄仁誉</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1171_0002_A_19</td>\n",
       "      <td>o2帮忙拿着行李领着c1在和门口站岗的哨兵敬礼后走进了文工团。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0002</td>\n",
       "      <td>19</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1171_0003_A_22</td>\n",
       "      <td>练习室里，一边是穿着舞蹈服的女演员，另一边是伴奏的乐队。</td>\n",
       "      <td>1171_nan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1171</td>\n",
       "      <td>0003</td>\n",
       "      <td>22</td>\n",
       "      <td>于海东</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1171_0003_A_28</td>\n",
       "      <td>舞蹈结束，女舞蹈演员摆出了一个造型。</td>\n",
       "      <td>1171_nan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1171</td>\n",
       "      <td>0003</td>\n",
       "      <td>28</td>\n",
       "      <td>于海东</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id  \\\n",
       "0    1171_0001_A_1   \n",
       "1    1171_0001_A_2   \n",
       "2    1171_0001_A_3   \n",
       "3    1171_0001_A_4   \n",
       "4    1171_0001_A_5   \n",
       "5    1171_0001_A_6   \n",
       "6    1171_0001_A_7   \n",
       "7    1171_0001_A_8   \n",
       "8    1171_0001_A_9   \n",
       "9   1171_0001_A_10   \n",
       "10  1171_0002_A_18   \n",
       "11  1171_0001_A_11   \n",
       "12  1171_0001_A_12   \n",
       "13  1171_0001_A_13   \n",
       "14  1171_0001_A_14   \n",
       "15  1171_0001_A_15   \n",
       "16  1171_0001_A_16   \n",
       "17  1171_0002_A_19   \n",
       "18  1171_0003_A_22   \n",
       "19  1171_0003_A_28   \n",
       "\n",
       "                                                                                                                                                                                                    content  \\\n",
       "0                                                                                                                                                                 天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。   \n",
       "1                                                                                                                                                                 天空下着暴雨，o2正在给c1穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。   \n",
       "2                                                                                                                                                                              o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。   \n",
       "3                                                                                                                                                                              o2一手拿着一个行李，一路小跑着把c1带到了文工团门口。   \n",
       "4                                                                                                                                                       o2停下来接过c1手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。   \n",
       "5                                                                                                                                                       o2停下来接过c1手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。   \n",
       "6                                                                                                                                                                                                c1开心地点了点头。   \n",
       "7                                                                                                          o2凑近c1小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。   \n",
       "8                                                                                                          o2凑近c1小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。   \n",
       "9                                                                                                                                                                              c1再次微笑着点头，然后举手敬礼，但是手的形状却是弯的。   \n",
       "10                                                                                                                                                                                                   雨已经停了。   \n",
       "11                                                                                                                                                                         o2笑了笑：军礼不是这么敬的。五指并拢，大臂带动小臂，举到齐眉。   \n",
       "12                                                                                                                                                                                          o2示范了一个动作，c1照做。   \n",
       "13                                                                                                                                                                                          o2示范了一个动作，c1照做。   \n",
       "14                                                                                                                                                                                        o2：礼毕。（再次举手敬礼）敬礼。   \n",
       "15                                                                                                                                                                                                    c1照做。   \n",
       "16  b1画外音：我叫b1，上个世纪七十年代，我在祖国大西南的一个省军区的文工团里服役，我是一名舞蹈演员，团里的人都叫我小p2。我要给你们讲的是我们文工团的故事，但在这个故事里，我不是主角。主角应该是他们俩，他叫o2，那时我们歌颂默默无闻的英雄，歌颂平凡中的伟大，就是歌颂o2这种人，穿雨衣的那个女孩儿，她叫c1，是我们舞蹈队托o2接来的新兵，她和o2几十年后的结局还要追溯到o2带她走进文工团的这一天。   \n",
       "17                                                                                                                                                                          o2帮忙拿着行李领着c1在和门口站岗的哨兵敬礼后走进了文工团。   \n",
       "18                                                                                                                                                                             练习室里，一边是穿着舞蹈服的女演员，另一边是伴奏的乐队。   \n",
       "19                                                                                                                                                                                       舞蹈结束，女舞蹈演员摆出了一个造型。   \n",
       "\n",
       "   character     emotions movie scene movie_id character_name  \n",
       "0    1171_o2  0,0,0,0,0,0  1171  0001        1            何仁晴  \n",
       "1    1171_c1  0,0,0,0,0,0  1171  0001        2            刘昆诚  \n",
       "2    1171_o2  0,0,0,0,0,0  1171  0001        3            何仁晴  \n",
       "3    1171_c1  0,0,0,0,0,0  1171  0001        4            刘昆诚  \n",
       "4    1171_o2  0,0,0,0,0,0  1171  0001        5            何仁晴  \n",
       "5    1171_c1  0,0,0,0,0,0  1171  0001        6            刘昆诚  \n",
       "6    1171_c1  0,1,0,0,0,0  1171  0001        7            刘昆诚  \n",
       "7    1171_o2  0,0,0,0,0,0  1171  0001        8            何仁晴  \n",
       "8    1171_c1  0,0,0,0,0,0  1171  0001        9            刘昆诚  \n",
       "9    1171_c1  0,1,0,0,0,0  1171  0001       10            刘昆诚  \n",
       "10  1171_nan          NaN  1171  0002       18            于海东  \n",
       "11   1171_o2  0,1,0,0,0,0  1171  0001       11            何仁晴  \n",
       "12   1171_o2  0,0,0,0,0,0  1171  0001       12            何仁晴  \n",
       "13   1171_c1  0,0,0,0,0,0  1171  0001       13            刘昆诚  \n",
       "14   1171_o2  0,0,0,0,0,0  1171  0001       14            何仁晴  \n",
       "15   1171_c1  0,0,0,0,0,0  1171  0001       15            刘昆诚  \n",
       "16   1171_b1  0,0,0,0,0,0  1171  0001       16            黄仁誉  \n",
       "17   1171_o2  0,0,0,0,0,0  1171  0002       19            何仁晴  \n",
       "18  1171_nan          NaN  1171  0003       22            于海东  \n",
       "19  1171_nan          NaN  1171  0003       28            于海东  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['character_name'] = train['character'].map(train_mapping)\n",
    "train.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.416228Z",
     "start_time": "2021-09-28T08:10:18.462140Z"
    }
   },
   "outputs": [],
   "source": [
    "def replace_text(text, movie, mapping):\n",
    "    character_list = re.findall(r'[a-z][0-9]', text)\n",
    "    for c in character_list:\n",
    "        if f'{movie}_{c}' in mapping:\n",
    "            text = text.replace(c, mapping[f'{movie}_{c}'])\n",
    "    return text\n",
    "\n",
    "train['content'] = train.apply(lambda row: replace_text(row['content'], row['movie'], train_mapping), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.436607Z",
     "start_time": "2021-09-28T08:10:19.418295Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content</th>\n",
       "      <th>character</th>\n",
       "      <th>emotions</th>\n",
       "      <th>movie</th>\n",
       "      <th>scene</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>character_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1171_0001_A_1</td>\n",
       "      <td>天空下着暴雨，何仁晴正在给刘昆诚穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>1</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1171_0001_A_2</td>\n",
       "      <td>天空下着暴雨，何仁晴正在给刘昆诚穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>2</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1171_0001_A_3</td>\n",
       "      <td>何仁晴一手拿着一个行李，一路小跑着把刘昆诚带到了文工团门口。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>3</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1171_0001_A_4</td>\n",
       "      <td>何仁晴一手拿着一个行李，一路小跑着把刘昆诚带到了文工团门口。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>4</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1171_0001_A_5</td>\n",
       "      <td>何仁晴停下来接过刘昆诚手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>5</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1171_0001_A_6</td>\n",
       "      <td>何仁晴停下来接过刘昆诚手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>6</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1171_0001_A_7</td>\n",
       "      <td>刘昆诚开心地点了点头。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,1,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>7</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1171_0001_A_8</td>\n",
       "      <td>何仁晴凑近刘昆诚小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>8</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1171_0001_A_9</td>\n",
       "      <td>何仁晴凑近刘昆诚小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>9</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1171_0001_A_10</td>\n",
       "      <td>刘昆诚再次微笑着点头，然后举手敬礼，但是手的形状却是弯的。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,1,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>10</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1171_0002_A_18</td>\n",
       "      <td>雨已经停了。</td>\n",
       "      <td>1171_nan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1171</td>\n",
       "      <td>0002</td>\n",
       "      <td>18</td>\n",
       "      <td>于海东</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1171_0001_A_11</td>\n",
       "      <td>何仁晴笑了笑：军礼不是这么敬的。五指并拢，大臂带动小臂，举到齐眉。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,1,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>11</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1171_0001_A_12</td>\n",
       "      <td>何仁晴示范了一个动作，刘昆诚照做。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>12</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1171_0001_A_13</td>\n",
       "      <td>何仁晴示范了一个动作，刘昆诚照做。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>13</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1171_0001_A_14</td>\n",
       "      <td>何仁晴：礼毕。（再次举手敬礼）敬礼。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>14</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1171_0001_A_15</td>\n",
       "      <td>刘昆诚照做。</td>\n",
       "      <td>1171_c1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>15</td>\n",
       "      <td>刘昆诚</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1171_0001_A_16</td>\n",
       "      <td>黄仁誉画外音：我叫黄仁誉，上个世纪七十年代，我在祖国大西南的一个省军区的文工团里服役，我是一名舞蹈演员，团里的人都叫我小p2。我要给你们讲的是我们文工团的故事，但在这个故事里，我不是主角。主角应该是他们俩，他叫何仁晴，那时我们歌颂默默无闻的英雄，歌颂平凡中的伟大，就是歌颂何仁晴这种人，穿雨衣的那个女孩儿，她叫刘昆诚，是我们舞蹈队托何仁晴接来的新兵，她和何仁晴几十年后的结局还要追溯到何仁晴带她走进文工团的这一天。</td>\n",
       "      <td>1171_b1</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0001</td>\n",
       "      <td>16</td>\n",
       "      <td>黄仁誉</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1171_0002_A_19</td>\n",
       "      <td>何仁晴帮忙拿着行李领着刘昆诚在和门口站岗的哨兵敬礼后走进了文工团。</td>\n",
       "      <td>1171_o2</td>\n",
       "      <td>0,0,0,0,0,0</td>\n",
       "      <td>1171</td>\n",
       "      <td>0002</td>\n",
       "      <td>19</td>\n",
       "      <td>何仁晴</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1171_0003_A_22</td>\n",
       "      <td>练习室里，一边是穿着舞蹈服的女演员，另一边是伴奏的乐队。</td>\n",
       "      <td>1171_nan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1171</td>\n",
       "      <td>0003</td>\n",
       "      <td>22</td>\n",
       "      <td>于海东</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1171_0003_A_28</td>\n",
       "      <td>舞蹈结束，女舞蹈演员摆出了一个造型。</td>\n",
       "      <td>1171_nan</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1171</td>\n",
       "      <td>0003</td>\n",
       "      <td>28</td>\n",
       "      <td>于海东</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id  \\\n",
       "0    1171_0001_A_1   \n",
       "1    1171_0001_A_2   \n",
       "2    1171_0001_A_3   \n",
       "3    1171_0001_A_4   \n",
       "4    1171_0001_A_5   \n",
       "5    1171_0001_A_6   \n",
       "6    1171_0001_A_7   \n",
       "7    1171_0001_A_8   \n",
       "8    1171_0001_A_9   \n",
       "9   1171_0001_A_10   \n",
       "10  1171_0002_A_18   \n",
       "11  1171_0001_A_11   \n",
       "12  1171_0001_A_12   \n",
       "13  1171_0001_A_13   \n",
       "14  1171_0001_A_14   \n",
       "15  1171_0001_A_15   \n",
       "16  1171_0001_A_16   \n",
       "17  1171_0002_A_19   \n",
       "18  1171_0003_A_22   \n",
       "19  1171_0003_A_28   \n",
       "\n",
       "                                                                                                                                                                                                            content  \\\n",
       "0                                                                                                                                                                       天空下着暴雨，何仁晴正在给刘昆诚穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。   \n",
       "1                                                                                                                                                                       天空下着暴雨，何仁晴正在给刘昆诚穿雨衣，他自己却只穿着单薄的军装，完全暴露在大雨之中。   \n",
       "2                                                                                                                                                                                    何仁晴一手拿着一个行李，一路小跑着把刘昆诚带到了文工团门口。   \n",
       "3                                                                                                                                                                                    何仁晴一手拿着一个行李，一路小跑着把刘昆诚带到了文工团门口。   \n",
       "4                                                                                                                                                             何仁晴停下来接过刘昆诚手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。   \n",
       "5                                                                                                                                                             何仁晴停下来接过刘昆诚手里的行李：你妈妈交待我了，等领了军装一定要照张相寄回去，让街坊邻居都知道你当兵了。   \n",
       "6                                                                                                                                                                                                       刘昆诚开心地点了点头。   \n",
       "7                                                                                                                何仁晴凑近刘昆诚小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。   \n",
       "8                                                                                                                何仁晴凑近刘昆诚小声：办入伍证审的时候，派出所的民警跟我说，你的亲生父亲还在劳改，但是你跟他划清了界限，改姓了你继父的姓，所以出身这一栏，我就给你填革干了，进了团不要跟别人说这件事，我也不会说的。   \n",
       "9                                                                                                                                                                                     刘昆诚再次微笑着点头，然后举手敬礼，但是手的形状却是弯的。   \n",
       "10                                                                                                                                                                                                           雨已经停了。   \n",
       "11                                                                                                                                                                                何仁晴笑了笑：军礼不是这么敬的。五指并拢，大臂带动小臂，举到齐眉。   \n",
       "12                                                                                                                                                                                                何仁晴示范了一个动作，刘昆诚照做。   \n",
       "13                                                                                                                                                                                                何仁晴示范了一个动作，刘昆诚照做。   \n",
       "14                                                                                                                                                                                               何仁晴：礼毕。（再次举手敬礼）敬礼。   \n",
       "15                                                                                                                                                                                                           刘昆诚照做。   \n",
       "16  黄仁誉画外音：我叫黄仁誉，上个世纪七十年代，我在祖国大西南的一个省军区的文工团里服役，我是一名舞蹈演员，团里的人都叫我小p2。我要给你们讲的是我们文工团的故事，但在这个故事里，我不是主角。主角应该是他们俩，他叫何仁晴，那时我们歌颂默默无闻的英雄，歌颂平凡中的伟大，就是歌颂何仁晴这种人，穿雨衣的那个女孩儿，她叫刘昆诚，是我们舞蹈队托何仁晴接来的新兵，她和何仁晴几十年后的结局还要追溯到何仁晴带她走进文工团的这一天。   \n",
       "17                                                                                                                                                                                何仁晴帮忙拿着行李领着刘昆诚在和门口站岗的哨兵敬礼后走进了文工团。   \n",
       "18                                                                                                                                                                                     练习室里，一边是穿着舞蹈服的女演员，另一边是伴奏的乐队。   \n",
       "19                                                                                                                                                                                               舞蹈结束，女舞蹈演员摆出了一个造型。   \n",
       "\n",
       "   character     emotions movie scene movie_id character_name  \n",
       "0    1171_o2  0,0,0,0,0,0  1171  0001        1            何仁晴  \n",
       "1    1171_c1  0,0,0,0,0,0  1171  0001        2            刘昆诚  \n",
       "2    1171_o2  0,0,0,0,0,0  1171  0001        3            何仁晴  \n",
       "3    1171_c1  0,0,0,0,0,0  1171  0001        4            刘昆诚  \n",
       "4    1171_o2  0,0,0,0,0,0  1171  0001        5            何仁晴  \n",
       "5    1171_c1  0,0,0,0,0,0  1171  0001        6            刘昆诚  \n",
       "6    1171_c1  0,1,0,0,0,0  1171  0001        7            刘昆诚  \n",
       "7    1171_o2  0,0,0,0,0,0  1171  0001        8            何仁晴  \n",
       "8    1171_c1  0,0,0,0,0,0  1171  0001        9            刘昆诚  \n",
       "9    1171_c1  0,1,0,0,0,0  1171  0001       10            刘昆诚  \n",
       "10  1171_nan          NaN  1171  0002       18            于海东  \n",
       "11   1171_o2  0,1,0,0,0,0  1171  0001       11            何仁晴  \n",
       "12   1171_o2  0,0,0,0,0,0  1171  0001       12            何仁晴  \n",
       "13   1171_c1  0,0,0,0,0,0  1171  0001       13            刘昆诚  \n",
       "14   1171_o2  0,0,0,0,0,0  1171  0001       14            何仁晴  \n",
       "15   1171_c1  0,0,0,0,0,0  1171  0001       15            刘昆诚  \n",
       "16   1171_b1  0,0,0,0,0,0  1171  0001       16            黄仁誉  \n",
       "17   1171_o2  0,0,0,0,0,0  1171  0002       19            何仁晴  \n",
       "18  1171_nan          NaN  1171  0003       22            于海东  \n",
       "19  1171_nan          NaN  1171  0003       28            于海东  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.441207Z",
     "start_time": "2021-09-28T08:10:19.438087Z"
    }
   },
   "outputs": [],
   "source": [
    "train_res = list(train_mapping.values())\n",
    "random.shuffle(train_res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.447579Z",
     "start_time": "2021-09-28T08:10:19.442685Z"
    }
   },
   "outputs": [],
   "source": [
    "# test 采用 train 出现过的随机值\n",
    "\n",
    "test_mapping = dict()\n",
    "for idx, c in enumerate(test_characters):\n",
    "    test_mapping[c] = train_res[idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.468546Z",
     "start_time": "2021-09-28T08:10:19.449448Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content</th>\n",
       "      <th>character</th>\n",
       "      <th>movie</th>\n",
       "      <th>scene</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>character_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34170_0002_A_12</td>\n",
       "      <td>穿着背心的b1醒来，看看手机，三点了。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0002</td>\n",
       "      <td>12</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>34170_0002_A_14</td>\n",
       "      <td>b1走出卧室。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0002</td>\n",
       "      <td>14</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>34170_0003_A_16</td>\n",
       "      <td>b1拿着手机，点开计时功能。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>16</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>34170_0003_A_17</td>\n",
       "      <td>b1站在淋浴头下面，水从b1的头和脸上冲刷而过。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>17</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>34170_0003_A_18</td>\n",
       "      <td>b1摈着呼吸。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>18</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>34170_0003_A_20</td>\n",
       "      <td>b1睁开了眼，喘了口气。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>20</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>34170_0003_A_21</td>\n",
       "      <td>b1看了看手机，大概四分钟。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>21</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>34170_0004_A_24</td>\n",
       "      <td>i3躺在被窝里熟睡。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>24</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>34170_0004_A_25</td>\n",
       "      <td>b1蹲在床边，拉着i3的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>25</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>34170_0004_A_26</td>\n",
       "      <td>b1蹲在床边，拉着i3的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>26</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                id                                 content character  movie  \\\n",
       "0  34170_0002_A_12                     穿着背心的b1醒来，看看手机，三点了。  34170_b1  34170   \n",
       "1  34170_0002_A_14                                 b1走出卧室。  34170_b1  34170   \n",
       "2  34170_0003_A_16                          b1拿着手机，点开计时功能。  34170_b1  34170   \n",
       "3  34170_0003_A_17                b1站在淋浴头下面，水从b1的头和脸上冲刷而过。  34170_b1  34170   \n",
       "4  34170_0003_A_18                                 b1摈着呼吸。  34170_b1  34170   \n",
       "5  34170_0003_A_20                            b1睁开了眼，喘了口气。  34170_b1  34170   \n",
       "6  34170_0003_A_21                          b1看了看手机，大概四分钟。  34170_b1  34170   \n",
       "7  34170_0004_A_24                              i3躺在被窝里熟睡。  34170_i3  34170   \n",
       "8  34170_0004_A_25  b1蹲在床边，拉着i3的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。  34170_b1  34170   \n",
       "9  34170_0004_A_26  b1蹲在床边，拉着i3的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。  34170_i3  34170   \n",
       "\n",
       "  scene movie_id character_name  \n",
       "0  0002       12            杨亭萱  \n",
       "1  0002       14            杨亭萱  \n",
       "2  0003       16            杨亭萱  \n",
       "3  0003       17            杨亭萱  \n",
       "4  0003       18            杨亭萱  \n",
       "5  0003       20            杨亭萱  \n",
       "6  0003       21            杨亭萱  \n",
       "7  0004       24            马舟宇  \n",
       "8  0004       25            杨亭萱  \n",
       "9  0004       26            马舟宇  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['character_name'] = test['character'].map(test_mapping)\n",
    "test.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.937638Z",
     "start_time": "2021-09-28T08:10:19.470058Z"
    }
   },
   "outputs": [],
   "source": [
    "test['content'] = test.apply(lambda row: replace_text(row['content'], row['movie'], test_mapping), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:19.956240Z",
     "start_time": "2021-09-28T08:10:19.939596Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content</th>\n",
       "      <th>character</th>\n",
       "      <th>movie</th>\n",
       "      <th>scene</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>character_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34170_0002_A_12</td>\n",
       "      <td>穿着背心的杨亭萱醒来，看看手机，三点了。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0002</td>\n",
       "      <td>12</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>34170_0002_A_14</td>\n",
       "      <td>杨亭萱走出卧室。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0002</td>\n",
       "      <td>14</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>34170_0003_A_16</td>\n",
       "      <td>杨亭萱拿着手机，点开计时功能。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>16</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>34170_0003_A_17</td>\n",
       "      <td>杨亭萱站在淋浴头下面，水从杨亭萱的头和脸上冲刷而过。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>17</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>34170_0003_A_18</td>\n",
       "      <td>杨亭萱摈着呼吸。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>18</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>34170_0003_A_20</td>\n",
       "      <td>杨亭萱睁开了眼，喘了口气。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>20</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>34170_0003_A_21</td>\n",
       "      <td>杨亭萱看了看手机，大概四分钟。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0003</td>\n",
       "      <td>21</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>34170_0004_A_24</td>\n",
       "      <td>马舟宇躺在被窝里熟睡。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>24</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>34170_0004_A_25</td>\n",
       "      <td>杨亭萱蹲在床边，拉着马舟宇的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>25</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>34170_0004_A_26</td>\n",
       "      <td>杨亭萱蹲在床边，拉着马舟宇的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>26</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>34170_0004_A_27</td>\n",
       "      <td>马舟宇还在熟睡。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>27</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>34170_0004_A_28</td>\n",
       "      <td>杨亭萱把马舟宇的手放下。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>28</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>34170_0005_A_31</td>\n",
       "      <td>桌上摆着水果、食物，墙上挂着“HappyBirthDay”字样的气球，显然这是要给马舟宇过生日。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0005</td>\n",
       "      <td>31</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>34170_0005_A_33</td>\n",
       "      <td>杨亭萱把摆放的不整齐的物件，整了整，放置整齐，看得出他是个对细节一丝不苟的人。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0005</td>\n",
       "      <td>33</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>34170_0005_A_35</td>\n",
       "      <td>杨亭萱走到镜子前，神情严肃地看着镜子里的自己。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0005</td>\n",
       "      <td>35</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>34170_0004_A_29</td>\n",
       "      <td>杨亭萱把马舟宇的手放下。</td>\n",
       "      <td>34170_i3</td>\n",
       "      <td>34170</td>\n",
       "      <td>0004</td>\n",
       "      <td>29</td>\n",
       "      <td>马舟宇</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>34170_0005_A_34</td>\n",
       "      <td>杨亭萱抱着狗狗，给它上药：多可爱的小狗啊，怎么说扔就扔了。（上完药）好了，再坚持两次，眼睛就好了啊。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0005</td>\n",
       "      <td>34</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>34170_0005_A_36</td>\n",
       "      <td>杨亭萱戴起帽子。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0005</td>\n",
       "      <td>36</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>34170_0005_A_32</td>\n",
       "      <td>杨亭萱穿着一身挺拔的机长服装，走下二楼楼梯。</td>\n",
       "      <td>34170_b1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0005</td>\n",
       "      <td>32</td>\n",
       "      <td>杨亭萱</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>34170_0007_A_42</td>\n",
       "      <td>许多空姐、陈艺坤都打着雨伞走进大楼，行色匆匆。</td>\n",
       "      <td>34170_f1</td>\n",
       "      <td>34170</td>\n",
       "      <td>0007</td>\n",
       "      <td>42</td>\n",
       "      <td>陈艺坤</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id                                             content  \\\n",
       "0   34170_0002_A_12                                穿着背心的杨亭萱醒来，看看手机，三点了。   \n",
       "1   34170_0002_A_14                                            杨亭萱走出卧室。   \n",
       "2   34170_0003_A_16                                     杨亭萱拿着手机，点开计时功能。   \n",
       "3   34170_0003_A_17                          杨亭萱站在淋浴头下面，水从杨亭萱的头和脸上冲刷而过。   \n",
       "4   34170_0003_A_18                                            杨亭萱摈着呼吸。   \n",
       "5   34170_0003_A_20                                       杨亭萱睁开了眼，喘了口气。   \n",
       "6   34170_0003_A_21                                     杨亭萱看了看手机，大概四分钟。   \n",
       "7   34170_0004_A_24                                         马舟宇躺在被窝里熟睡。   \n",
       "8   34170_0004_A_25            杨亭萱蹲在床边，拉着马舟宇的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。   \n",
       "9   34170_0004_A_26            杨亭萱蹲在床边，拉着马舟宇的手，轻声说道：满儿，爸爸今晚下班以后回来给你过生日。   \n",
       "10  34170_0004_A_27                                            马舟宇还在熟睡。   \n",
       "11  34170_0004_A_28                                        杨亭萱把马舟宇的手放下。   \n",
       "12  34170_0005_A_31    桌上摆着水果、食物，墙上挂着“HappyBirthDay”字样的气球，显然这是要给马舟宇过生日。   \n",
       "13  34170_0005_A_33             杨亭萱把摆放的不整齐的物件，整了整，放置整齐，看得出他是个对细节一丝不苟的人。   \n",
       "14  34170_0005_A_35                             杨亭萱走到镜子前，神情严肃地看着镜子里的自己。   \n",
       "15  34170_0004_A_29                                        杨亭萱把马舟宇的手放下。   \n",
       "16  34170_0005_A_34  杨亭萱抱着狗狗，给它上药：多可爱的小狗啊，怎么说扔就扔了。（上完药）好了，再坚持两次，眼睛就好了啊。   \n",
       "17  34170_0005_A_36                                            杨亭萱戴起帽子。   \n",
       "18  34170_0005_A_32                              杨亭萱穿着一身挺拔的机长服装，走下二楼楼梯。   \n",
       "19  34170_0007_A_42                             许多空姐、陈艺坤都打着雨伞走进大楼，行色匆匆。   \n",
       "\n",
       "   character  movie scene movie_id character_name  \n",
       "0   34170_b1  34170  0002       12            杨亭萱  \n",
       "1   34170_b1  34170  0002       14            杨亭萱  \n",
       "2   34170_b1  34170  0003       16            杨亭萱  \n",
       "3   34170_b1  34170  0003       17            杨亭萱  \n",
       "4   34170_b1  34170  0003       18            杨亭萱  \n",
       "5   34170_b1  34170  0003       20            杨亭萱  \n",
       "6   34170_b1  34170  0003       21            杨亭萱  \n",
       "7   34170_i3  34170  0004       24            马舟宇  \n",
       "8   34170_b1  34170  0004       25            杨亭萱  \n",
       "9   34170_i3  34170  0004       26            马舟宇  \n",
       "10  34170_i3  34170  0004       27            马舟宇  \n",
       "11  34170_b1  34170  0004       28            杨亭萱  \n",
       "12  34170_i3  34170  0005       31            马舟宇  \n",
       "13  34170_b1  34170  0005       33            杨亭萱  \n",
       "14  34170_b1  34170  0005       35            杨亭萱  \n",
       "15  34170_i3  34170  0004       29            马舟宇  \n",
       "16  34170_b1  34170  0005       34            杨亭萱  \n",
       "17  34170_b1  34170  0005       36            杨亭萱  \n",
       "18  34170_b1  34170  0005       32            杨亭萱  \n",
       "19  34170_f1  34170  0007       42            陈艺坤  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-09-28T08:10:20.209251Z",
     "start_time": "2021-09-28T08:10:19.957869Z"
    }
   },
   "outputs": [],
   "source": [
    "train.to_csv('./data/train_with_names.csv', index=False)\n",
    "test.to_csv('./data/test_with_names.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
